library(plotly)
library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(leaflet)
library(lehdr)
library(ggplot2)
library(ggthemes)

options(
  tigris_class = "sf",
  tigris_use_cache = T 
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Updated 05/11/2020 # Mapping distribution of essential workers by block group for San Jose Analysis: Using the CA Essential Business Guidelines,the LODES dataset and the QWI dataset, the % of essential workers (by NAICS code) was mapped to each block group in San Jose.

Methodology

The data used to assess the % of workers deemed essential was determined using the following approach. The CA Essential proportion of 6-digit NAICS codes in each 4-digit NAICS was assessed here using a data set prepared by the CEE 218Z Vulnerability Team (linked here:https://docs.google.com/spreadsheets/d/1piMnUpohkY-HquAhuKEzeJVhCdmxTX75b8S6mnNjxQY/edit#gid=0). The CEE 218Z team read through the “Essential Critical Infrastructure Order” March 22nd 2020 and manually assigned the “essential”/“nonessential” designation by reading the order and control “F” searching through the list of 6-digit NAICS codes. There were two methods of analysis. Method 1: 4-digit NAICS codes were weighted by fraction of 6-digit NAICS codes in the 4-digit NAICS code. Then this fraction was multiplied by the number of employees in each 4-digit NAICS code (using LODES data). Then proportion of essential workers in each 2-digit NAICS code was calculated by dividing number of essential employees in each 4-digit NAICS code by the number of total employees.

Method 2: Only 2-digit NAICS codes with >= 50% of the 6-digit sub-codes were marked as essential.

Following calculation of the fraction of essential workers in each block group. It was mapped using leaflet.

First get the block groups in San Jose

scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)

# Use tracts sent to us by San Jose
dir <- "pCloud Drive/SFBI/Data Library"
sj_tracts <- st_read(file.path(dir,"San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp")) %>% 
  st_as_sf() %>% 
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS:            2227
sj_citycouncil_disticts <- st_read(file.path(dir, "San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp")) %>% 
  st_as_sf() %>% 
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS:            2227
# from code written by others to get SJ blockgroups
sj_blockgroups <- 
  scc_blockgroups %>% 
  st_centroid() %>% 
  st_join(sj_tracts, left = F) %>% 
  st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>% 
  mutate(
    DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
  ) %>% 
  st_set_geometry(NULL) %>% 
  left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>% 
  st_as_sf() %>% 
  dplyr::select(GEOID, DISTRICTS)

# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9

sj_boundary <- 
  places("CA", cb=F, progress_bar=F) %>% 
  filter(NAME == "San Jose")

Next obtain the distribution of workers from the LODES dataset

# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)

# get the LODES data
sj_rac <- 
  grab_lodes(
    state = "ca", 
    year = 2017, 
    lodes_type = "rac", 
    job_type = "JT01",
    segment = "S000", 
    state_part = "main",
    agg_geo = "bg"
  ) %>% 
  left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>% 
  filter(!is.na(DISTRICTS)) %>%
  dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))

Get QWI Data which tells us number of works in each 4 digit NAICS code

# Get distribution of jobs in Santa Clara County
#Get NAICS codes
label_industry <-
  read_csv(file.path(dir,"NAICS/label_industry.csv"))
#GetQWI data
qwi_scc <- NULL
for(years in 2017){
  qwi<-
    getCensus(
      name = "timeseries/qwi/sa",
      region = "county:085",
      regionin = "state:06",
      vars = c("EmpS","EarnS","industry","ind_level"),
      time = years
    ) %>%
    filter(ind_level == 4) %>%
    mutate(
      year = substr(time,1,4)
    ) %>%
    left_join(label_industry, by= "industry") %>%
    group_by(year,industry,label) %>%
    summarize(
      EmpS = round(mean(as.numeric(EmpS), na.rm = TRUE),0),
      EarnS = round(mean(as.numeric(EarnS), na.rm = TRUE),0)
    ) %>%
    filter(!is.na(EmpS) & EmpS != 0)
  qwi_scc<-
    rbind(qwi_scc,qwi)
}
arrange(qwi, desc(EmpS))
## # A tibble: 268 x 5
## # Groups:   year, industry [268]
##    year  industry label                                               EmpS EarnS
##    <chr> <chr>    <chr>                                              <dbl> <dbl>
##  1 2017  5415     Computer Systems Design and Related Services       74335 15200
##  2 2017  7225     Restaurants and Other Eating Places                54063  2217
##  3 2017  5191     Other Information Services                         45339 34325
##  4 2017  3341     Computer and Peripheral Equipment Manufacturing    42593 21186
##  5 2017  3344     Semiconductor and Other Electronic Component Manu… 38376 19610
##  6 2017  6111     Elementary and Secondary Schools                   36927  5059
##  7 2017  6221     General Medical and Surgical Hospitals             31975  8206
##  8 2017  6241     Individual and Family Services                     24505  1623
##  9 2017  6113     Colleges, Universities, and Professional Schools   23194  8034
## 10 2017  5112     Software Publishers                                21594 26285
## # … with 258 more rows

Get Essential Deisgnations

#Here I will use the Vulnerability Team's California Essential Business NAICS code list according to California State Public Health Officer's Report 
dir <- "/Users/spencerrobinson/pCloud Drive/SFBI/Data Library"
CA_essential <- read_csv(file.path(dir,'Essential Designations/ca_essential_designations.csv'))
CA_essential<- CA_essential %>%
  mutate(`CA Essential` = replace_na(`CA Essential`, FALSE)) 
CA_essential$`CA Essential` <-  as.logical(CA_essential$`CA Essential`) 

#Here I use Delaware's Essential Business NAICS code list (only goes to 4 digit)
DE_essential <- read_csv(file.path(dir, 'Essential Designations/delaware_essential_designations.csv'))

Spencer Method 1: Use CA Essential 6-digit NAICS codes to assess Fraction-A: Fraction of 6-digit NAICS codes in each 4-digit NAICS code. Use QWI data to assess Fraction-B: Fraction of 4-digit NAICS code workers in each 2-digit NAICS code category (corresponds to LODES data). To determine % essential workers we multiply Fraction-A by Fraction-B.

CA_essential_weighted_grouped_ca1<- CA_essential %>% 
  mutate(NAICS_4_dig = str_sub(NAICS, 1,4)) %>%
  group_by(NAICS_4_dig, `CA Essential`) %>% 
  summarize(count = n()) %>% 
  ungroup() %>% 
  spread(`CA Essential`,count) %>% 
  mutate(`FALSE` = replace_na(`FALSE`, 0), `TRUE` = replace_na(`TRUE`, 0)) %>% 
  rename(c("essential" = "TRUE", "nonessential" = "FALSE")) %>% 
  mutate(totalCount = essential + nonessential, fracEssential_4dig = essential / totalCount) %>% #FractionA 
  full_join(qwi_scc, by = c('NAICS_4_dig' = 'industry')) %>% # Bring in QWI data with number of workers 4digit NAICS
  mutate(EmpS = replace_na(EmpS,0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
  mutate(fracEssential_4dig =replace(fracEssential_4dig, NAICS_4_dig== "8141", 0), EmpS = replace(EmpS, NAICS_4_dig == "8141",0)) %>%   #Manually assign NAICS code 8141 as nonessential 
  mutate(essEmps = EmpS*fracEssential_4dig) %>%  #Number of Essenital employees in each 4 digit category 
  mutate(NAICS_2_dig = str_sub(NAICS_4_dig, 1,2)) %>%
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% #Bring in LODES data with 
  group_by(LODES_variable) %>% 
  summarise(EmpS = sum(EmpS), essEmps = sum(essEmps)) %>% 
  mutate(fracEssential_2dig = essEmps/EmpS) %>% 
  mutate(df = "CA1")

Simone’s Method 1 with DE Essential Business Data: Use the QWI data to find the distribution of workers in NAICS 4-digit codes in Santa Clara County, and use this to weight the contributions to each 2-digit code. Note that this method disregards information about 6 digit codes within the 4 digit code that differ from the broader 4 digit code; this could be adjusted, but without data on the distribution of 6 digit within 4 digit I thought it was best to leave out.

#CA Fraction Employed; DE designations
CA_essential_weighted_grouped_de1 <- DE_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,4)) %>% 
  full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
  mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% 
  mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
  group_by(LODES_variable, Essential) %>% 
  summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential 
  mutate(Essential = replace_na(Essential, TRUE)) %>% #CNS20 or NAICS code 92 isn't in the DE dataset and is given as NA, but it is public administration and we assume  essential
  ungroup() %>% 
  spread(Essential, totalWorkers, fill = 0) %>%
  rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>% 
  mutate(totalCount = Essential + Nonessential, fracEssential_2dig = Essential / totalCount) %>% 
  mutate(df = "DE") %>% 
  select("LODES_variable","fracEssential_2dig","df")
#Let's compare the fraction essential for Spencer Method 1 (CA Designations) v. Simone Method 1 (DE Designations)
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_de1)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential_2dig, fill = df))+
  geom_bar(stat = "identity", position = position_dodge())+
  ggtitle(" Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
  theme_fivethirtyeight()+
  theme(
    plot.title = element_text(size=14, face="bold.italic"),
    axis.text.x =  element_text(angle = 90, size=14),
    axis.title.y = element_text(size=14)
  )
compare_graph

# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_weighted_grouped_ca1 %>% dplyr::select(fracEssential_2dig, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  mutate(numEssential = fracEssential_2dig * Number) %>%
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(numEssential)) %>%
  ungroup() %>%
  mutate(fracEssential_2dig = round((totalEssential / C000)*100, 1))

sj_rac_essential_de1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_weighted_grouped_de1 %>% dplyr::select(fracEssential_2dig, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  mutate(numEssential = fracEssential_2dig * Number) %>%
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(numEssential)) %>%
  ungroup() %>%
  mutate(fracEssential_2dig = round((totalEssential / C000)*100, 1))

Map of method 1 using California Essential Business Designations

# set up palette
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(sj_rac_essential_ca1 %>% 
        pull(fracEssential_2dig) %>% 
        unique())
)

ca_map1 <- leaflet(data = sj_rac_essential_ca1) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_ca1 %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential_2dig),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential_2dig,"% of workers that are essential by block group"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  )%>% 
  addLegend(pal = blue_pal, values = ~fracEssential_2dig, opacity = 1, title = "% Workers in Essential Bus.") 
ca_map1

Map of method 1 using Delaware Essential Business Designations

red_pal <- colorNumeric(
  palette = "Reds",
  domain = 
    c(sj_rac_essential_de1 %>% 
        pull(fracEssential_2dig) %>% 
        unique())
)

de_map1 <- leaflet(data = sj_rac_essential_de1) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_de1 %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~red_pal(fracEssential_2dig),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential_2dig,"% of workers that are essential by block group "),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = red_pal, values = ~fracEssential_2dig, opacity = 1, title = "% Workers in Essential Bus.")
de_map1 

Spencer Method 2: Only count as essential the 2 digit NAICS codes with >=80% of the 6 digit sub-codes marked as essential

CA_essential_grouped <- CA_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,6)) %>% #Convert 6 digit NAICS to string
  mutate(NAICS_2_dig = str_sub(NAICS, 1,2)) %>% #Get 2 digit NAICS which correspond to LODES
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% 
  group_by(LODES_variable, `CA Essential`) %>% 
  summarize(Count = n()) %>%
  spread(`CA Essential`, Count) %>% 
  mutate(`FALSE` = replace_na(`FALSE`,FALSE), `TRUE` = replace_na(`TRUE`,TRUE)) %>% 
  rename(c("essential" = "TRUE", "nonessential" = "FALSE")) %>% 
  mutate(totalCount = essential+nonessential, fracEssential = essential/totalCount) %>% 
  mutate(df = "CA2")

CA_essential <- ggplot(CA_essential_grouped, aes(fracEssential))+ 
  geom_histogram(binwidth = 0.1)

CA_essential_grouped <- CA_essential_grouped %>% mutate(binaryEssential = fracEssential >= 0.5) # 0.5 used because of low fracEssential values for most NAICS codes
# get essential breakdown in San Jose - CA designations
sj_rac_essential_grouped <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_grouped %>% dplyr::select(binaryEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  filter(binaryEssential == TRUE) %>% 
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(Number)) %>%
  ungroup() %>%
  mutate(fracEssential = round((totalEssential / C000)*100, 1))

essential_workers_visualize <- ggplot(data = sj_rac_essential_grouped, aes(x = fracEssential))+
  geom_histogram()+
  labs(y = "Number of Block Groups", x = "% Essential Workers")+
  ggtitle("Distribution of Block Group Essential Worker Data ")
 
essential_workers_visualize

Map of method 2 using California Essential Business Designations

# set up palette
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(sj_rac_essential_grouped %>% 
        pull(fracEssential) %>% 
        unique())
)

ca_map2 <- leaflet(data = sj_rac_essential_grouped) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_grouped %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"%"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")

ca_map2